Total: 1H30
“Machine Learning algorithms enable the computers to learn from data, and even improve themselves, without being explicitly programmed” (Arthur Samuel).
import pandas as pd
from sklearn.BRANCH import MODEL_NAME
from sklearn.metrics import METRIC_NAME
from sklearn.model_selection import train_test_split
df = pd.read_csv('data.csv') # load data
# Split data into train/test (70/30)
X, y = df.drop(['TARGET'], axis=1), df['TARGET']
X_train, X_test, y_train, y_test = train_test_split(
X, y, train_size=0.7, random_state=42)
MODEL = MODEL_NAME() # build model
MODEL.fit(X_train, y_train) # train model
y_pred_MODEL = MODEL.predict(X_test) # result
METRIC = METRIC_NAME((y_test, y_pred_MODEL)) # evalby Abdul Rahid
By synergy37AI, in Medium: Decision Trees: Lesson 101
By Tony Yiu, in Medium: Understanding Random Forest
Note: Small K can generate overfitting (learns from noise), large K can generate underfitting (neighbors too far away).
Accept job offer based on location (remote or face-to-face), salary and coffee!
Conditions:
jobs salary places coffee accept
0 data-analyst 2500 remote False False
1 data-scientist 3000 remote False False
2 data-scientist 3500 remote False True
3 data-scientist 5000 New York True True
4 data-engineer 4000 Madrid True True
5 devops 3500 Lisbon False False
6 qa-tester 3800 Porto True False
def accept_offer(places: str, salary: int, coffee: bool):
if places == "remote" and salary >= 3500:
return True
else:
if salary >= 4000 and coffee:
return True
else:
if salary >= 4500:
return True
else:
return False
offer1 = accept_offer("New York", 4400, False)
offer2 = accept_offer("Berlin", 5000, False)
offer3 = accept_offer("remote", 3600, False)
print(f"[{offer1} {offer2} {offer3}]")[False True True]
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
df = pd.read_csv('data/jobs.csv') # load data
# Preprocessing data
df['places'].replace({'remote': 1}, inplace=True)
df['places'].replace(to_replace=r'\D+', value='0', regex=True, inplace=True)
X, y = df.drop(['jobs', 'accept'], axis=1), df.accept
MODEL = RandomForestClassifier(random_state=1) # build model
MODEL.fit(X, y) # train model
offers = {'salary': [4400, 5000, 3600],
'places': [0, 0, 1],
'coffee': [0, 0, 0]}
offer = pd.DataFrame(offers)
print(MODEL.predict(offer))[False True True]
5 minutes
@tamagusko on LinkedIn to stay in touch!
© 2022 Tamagusko